/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import mosdi.fa.Alphabet;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.Log;
import mosdi.util.SequenceUtils;

public class MinMaxTableSubcommand extends Subcommand {
	
	@Override
	public String usage() {
		return
		super.usage()+" [options] <q-gram-table-file>\n" +
		"\n" +
		"  <q-gram-table-file> File with q-gram counts to estimate text model from.\n" +
		"                      Can be generated from sequences using \"mosdi-utils count-qgrams\".\n" +
		"\n" +
		"Options:\n" +
		"  -g: group IUPAC characters according to their degeneracy (default:lexicographical)";
	}
	
	@Override
	public String description() {
		return
		"Calculates a table with a minimum and maximum " +
		"(w.r.t. all text model states) probability " +
		"for each IUPAC character.";
	}

	@Override
	public String name() {
		return "min-max-table";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 1, "g");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		String filename = getStringArgument(0);

		// Options
		boolean lexOrder = getBooleanOption("g", true);
		
		FiniteMemoryTextModel textModel = null; 
		try {
			textModel = SequenceUtils.buildTextModelFromQGramFile(filename);
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}

		BitArray[] genAlph = Iupac.asGeneralizedAlphabet(); 
		double[] minTab, maxTab;
		minTab = textModel.minProbabilityTable(genAlph);
		maxTab = textModel.maxProbabilityTable(genAlph);
		
		Log.println(Log.Level.STANDARD, "IUPAC char      min        max");
		if (lexOrder) {
			for (int c=0; c<genAlph.length; ++c) {
				printChar(c,minTab, maxTab);
			}
		} else {
			for (int mult=1; mult<=4; ++mult) {
				for (int c=0; c<genAlph.length; ++c) {
					if (Iupac.getCharacterMultiplicity(c)==mult) printChar(c,minTab, maxTab);
				}
			}
		}
		return 0;
	}
	
	public static void printChar(int character, double[] minTab, double[] maxTab) {
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		StringBuffer sb = new StringBuffer();
		sb.append(iupacAlphabet.get(character)+" = {");
		BitArray ba = Iupac.iupacCharToBitArray(character);
		for (int c=0; c<dnaAlphabet.size(); ++c) {
			if (ba.get(c)) {
				if (sb.length()>5) sb.append(',');
				sb.append(dnaAlphabet.get(c));
			}
		}
		sb.append("}");
		for (int n = 15-sb.length();n>0; --n) sb.append(' ');
		Log.printf(Log.Level.STANDARD, "%s %05f   %05f\n", sb.toString(), minTab[character], maxTab[character]);
	}
	
}
